Applied Stochastic Process Project¶

Author: Fatemeh Abbasian-Abyaneh¶

Student ID: 400100338¶

Implementation Details¶

The code is structured to simulate ad allocation strategies across multiple platforms using the Upper Confidence Bound (UCB) algorithm. The main objective of this implementation is to estimate click probabilities, cumulative revenue, and revenue per ad across different configurations of firms (K), sites (L), and dimensions (d).

  1. Initialization of Vectors: The first step in the code involves initializing the click probabilities for each firm and the traffic distribution for each site. The click probabilities are generated randomly for each firm, while the site traffic is modeled using an exponential distribution. This simulates varying levels of user engagement across different sites, which is then normalized to ensure the traffic sums to one.

  2. Click Probability Calculation: The code calculates the probability that a click will result from a specific firm’s ad on a specific site. This is done by computing the outer product of the initialized vectors, resulting in a matrix where each element represents the click probability for a given firm-site pair.

  3. UCB Selection Strategy: The UCB algorithm is implemented to select the site that maximizes the upper confidence bound of the estimated click probability. This approach balances exploration (trying out different platforms) with exploitation (continuing to invest in platforms that yield high returns). The algorithm updates the estimated probabilities and click counts as it iterates through the time horizon.

  4. Simulation Execution: The simulation is run across various configurations defined by different values of dimensions (d), number of sites (L), and number of firms (K). For each configuration, the simulation calculates the estimated click probabilities, the number of visits, clicks, and cumulative revenue over time. These results are stored for subsequent analysis and visualization.

  5. Visualization of Results: The final part of the code generates visualizations to compare the estimated click probabilities against the true probabilities, track cumulative revenue over time, and assess revenue distribution per ad. These visualizations help in understanding the effectiveness of the UCB algorithm across different scenarios.

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def initialize_vectors(K, L):
    # Click probabilities for firms
    c = np.random.rand(K)
    # Site traffic (exponential distribution)
    s = np.random.exponential(scale=1.0, size=L)
    s = s / np.sum(s)  # Normalize to get relative traffic
    return c, s

def click_probability(c, s):
    return np.outer(c, s)

def ucb_selection(K, L, c, s, horizon, alpha=1.0):
    visits = np.zeros((K, L), dtype=int)
    clicks = np.zeros((K, L), dtype=int)
    estimated_prob = np.zeros((K, L))
    
    for t in range(1, horizon + 1):
        ucb_values = estimated_prob + alpha * np.sqrt(np.log(t) / (visits + 1))
        i, j = np.unravel_index(np.argmax(ucb_values), ucb_values.shape)
        
        click_prob = click_probability(c, s)[i, j]
        click = np.random.rand() < click_prob
        
        visits[i, j] += 1
        clicks[i, j] += click
        estimated_prob[i, j] = clicks[i, j] / visits[i, j]
    
    return estimated_prob, visits, clicks

def run_simulation(d, L, horizon, K=100):
    c, s = initialize_vectors(K, L)
    estimated_prob, visits, clicks = ucb_selection(K, L, c, s, horizon)
    
    return estimated_prob, visits, clicks, c, s

# Parameters
d_list = [1, 1, 1, 2, 2, 2, 10, 10, 10]
L_list = [1, 2, 10, 1, 2, 10, 2, 10, 20]
horizon = 10000
K = 100

results = {}
for d, L in zip(d_list, L_list):
    estimated_prob, visits, clicks, c, s = run_simulation(d, L, horizon, K)
    true_prob = click_probability(c, s)
    results[(d, L)] = {
        'estimated_prob': estimated_prob,
        'visits': visits,
        'clicks': clicks,
        'true_prob': true_prob,
        'cumulative_revenue': np.cumsum(clicks * np.random.rand(K, L))
    }

# Visualization of Results
fig_scatter = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
fig_revenue = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
fig_bar = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])

for i, ((d, L), result) in enumerate(results.items()):
    row = i // 3 + 1
    col = i % 3 + 1
    
    # Scatter plot for estimated vs. true click probabilities
    scatter_data = pd.DataFrame({
        'True Probability': result['true_prob'].flatten(),
        'Estimated Probability': result['estimated_prob'].flatten()
    })
    fig_scatter.add_trace(go.Scatter(x=scatter_data['True Probability'], y=scatter_data['Estimated Probability'], mode='markers', marker=dict(color='blue')), row=row, col=col)
    fig_scatter.add_shape(type="line", line=dict(dash='dash', color='red'), x0=0, y0=0, x1=1, y1=1, row=row, col=col)

    # Line plot for cumulative revenue over time
    fig_revenue.add_trace(go.Scatter(y=result['cumulative_revenue'].flatten(), mode='lines', line=dict(color='green')), row=row, col=col)

    # Bar plot for revenue per ad
    ad_revenue = result['clicks'] * np.random.rand(K, L)
    bar_data = pd.DataFrame({
        'Ad': range(K * L),
        'Revenue': ad_revenue.flatten()
    })
    fig_bar.add_trace(go.Bar(x=bar_data['Ad'], y=bar_data['Revenue'], marker=dict(color='purple')), row=row, col=col)

fig_scatter.update_layout(height=1200, width=1200, title_text="Estimated vs True Click Probabilities")
fig_revenue.update_layout(height=1200, width=1200, title_text="Cumulative Revenue over Time")
fig_bar.update_layout(height=1200, width=1200, title_text="Revenue per Ad")

fig_scatter.show()
fig_revenue.show()
fig_bar.show()

Implementation Details¶

This code is designed to run simulations using the Upper Confidence Bound (UCB) algorithm to estimate click probabilities, cumulative revenue, and revenue per ad across multiple configurations of firms (K), sites (L), and dimensions (d). The key steps in the implementation include:

  1. Vector Initialization: The code begins by initializing vectors that represent the click probabilities for each firm and the traffic distribution for each site. The click probabilities are randomly generated, while the site traffic is modeled using an exponential distribution, which is then normalized. This setup simulates the varying attractiveness or user engagement of different sites.

  2. UCB Selection Strategy: The UCB algorithm is employed to determine which site a firm should select to place its ad. The UCB value is computed based on the estimated click probabilities and the number of times a site has been visited, with an exploration parameter influencing the balance between trying new sites and exploiting known profitable sites. This strategy helps the algorithm adaptively explore the best sites while maximizing returns.

  3. Running Simulations: The simulation is performed across a range of scenarios defined by different values of dimensions (d) and sites (L). For each scenario, the algorithm runs through a predefined horizon (number of time steps), during which it updates the estimated probabilities, records the number of visits, counts the clicks, and tracks cumulative revenue. This allows the model to simulate different advertising environments and observe how the UCB algorithm performs under various conditions.

  4. Result Visualization: After the simulations are complete, the results are visualized in three different ways:

    • Scatter Plots: These show the relationship between the estimated and true click probabilities, helping to assess the accuracy of the UCB algorithm in predicting click probabilities.
    • Cumulative Revenue Plots: These line plots track how revenue accumulates over time, providing insights into the algorithm's ability to maximize returns.
    • Revenue per Ad Bar Plots: These bar charts display the distribution of revenue across different ads, giving a sense of how evenly or unevenly the UCB algorithm allocates resources.
In [2]:
def thompson_sampling(K, L, c, s, horizon, a_prior=1, b_prior=1):
    visits = np.zeros((K, L), dtype=int)
    clicks = np.zeros((K, L), dtype=int)
    estimated_prob = np.zeros((K, L))
    
    a = a_prior * np.ones((K, L))
    b = b_prior * np.ones((K, L))
    
    for t in range(1, horizon + 1):
        sampled_theta = np.random.beta(a, b)
        i, j = np.unravel_index(np.argmax(sampled_theta), sampled_theta.shape)
        
        click_prob = click_probability(c, s)[i, j]
        click = np.random.rand() < click_prob
        
        visits[i, j] += 1
        clicks[i, j] += click
        estimated_prob[i, j] = clicks[i, j] / visits[i, j]
        
        # Update beta distribution parameters
        a[i, j] += click
        b[i, j] += 1 - click
    
    return estimated_prob, visits, clicks

def run_simulation(d, L, horizon, K=100):
    c, s = initialize_vectors(K, L)
    estimated_prob, visits, clicks = thompson_sampling(K, L, c, s, horizon)
    
    return estimated_prob, visits, clicks, c, s

# Parameters
d_list = [1, 1, 1, 2, 2, 2, 10, 10, 10]
L_list = [1, 2, 10, 1, 2, 10, 2, 10, 20]
horizon = 10000
K = 100

results = {}
for d, L in zip(d_list, L_list):
    estimated_prob, visits, clicks, c, s = run_simulation(d, L, horizon, K)
    true_prob = click_probability(c, s)
    results[(d, L)] = {
        'estimated_prob': estimated_prob,
        'visits': visits,
        'clicks': clicks,
        'true_prob': true_prob,
        'cumulative_revenue': np.cumsum(clicks * np.random.rand(K, L))
    }

# Visualization of Results
fig_scatter = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
fig_revenue = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])
fig_bar = make_subplots(rows=3, cols=3, subplot_titles=[f'd={d}, L={L}' for d, L in zip(d_list, L_list)])

for i, ((d, L), result) in enumerate(results.items()):
    row = i // 3 + 1
    col = i % 3 + 1
    
    # Scatter plot for estimated vs. true click probabilities
    scatter_data = pd.DataFrame({
        'True Probability': result['true_prob'].flatten(),
        'Estimated Probability': result['estimated_prob'].flatten()
    })
    fig_scatter.add_trace(go.Scatter(x=scatter_data['True Probability'], y=scatter_data['Estimated Probability'], mode='markers', marker=dict(color='blue')), row=row, col=col)
    fig_scatter.add_shape(type="line", line=dict(dash='dash', color='red'), x0=0, y0=0, x1=1, y1=1, row=row, col=col)

    # Line plot for cumulative revenue over time
    fig_revenue.add_trace(go.Scatter(y=result['cumulative_revenue'].flatten(), mode='lines', line=dict(color='green')), row=row, col=col)

    # Bar plot for revenue per ad
    ad_revenue = result['clicks'] * np.random.rand(K, L)
    bar_data = pd.DataFrame({
        'Ad': range(K * L),
        'Revenue': ad_revenue.flatten()
    })
    fig_bar.add_trace(go.Bar(x=bar_data['Ad'], y=bar_data['Revenue'], marker=dict(color='purple')), row=row, col=col)

fig_scatter.update_layout(height=1200, width=1200, title_text="Estimated vs True Click Probabilities (Thompson Sampling)")
fig_revenue.update_layout(height=1200, width=1200, title_text="Cumulative Revenue over Time (Thompson Sampling)")
fig_bar.update_layout(height=1200, width=1200, title_text="Revenue per Ad (Thompson Sampling)")

fig_scatter.show()
fig_revenue.show()
fig_bar.show()

Implementation Details¶

This code is designed to compare the performance of two algorithms—Thompson Sampling and Upper Confidence Bound (UCB)—in a simulated environment where firms allocate their ads across multiple platforms (sites). The goal is to estimate click probabilities, track cumulative revenue, and assess revenue distribution per ad under various scenarios.

  1. Thompson Sampling:

    • The code begins by implementing the Thompson Sampling algorithm, which uses Bayesian inference to sample from the posterior distribution of click probabilities. This allows the algorithm to balance exploration (trying new sites) and exploitation (investing in the most profitable sites) by adjusting its estimates based on observed click data.
  2. UCB Algorithm:

    • The UCB algorithm is implemented next, focusing on balancing exploration and exploitation by selecting the site that maximizes the upper confidence bound of the estimated click probability. This method relies on the notion that sites with higher uncertainty (less exploration) should be prioritized until sufficient data is collected to confirm their profitability.
  3. Simulation Execution:

    • The run_simulation function is a wrapper that allows for the execution of both Thompson Sampling and UCB algorithms across different configurations defined by the number of sites (L) and firms (K). The function initializes the click probabilities and site traffic, then applies the selected algorithm (either Thompson or UCB) over a specified time horizon.
    • The simulation outputs include the estimated click probabilities, the number of visits and clicks for each firm-site pair, the true click probabilities, and the cumulative revenue generated over time.
  4. Result Visualization:

    • The results of the simulation are visualized using three different types of plots:
      • Scatter Plots: These plots compare the estimated click probabilities against the true probabilities for each algorithm, providing insight into the accuracy of the estimates.
      • Cumulative Revenue Plots: These line plots show how revenue accumulates over time, allowing for a direct comparison of the performance of Thompson Sampling and UCB in maximizing revenue.
      • Revenue per Ad Bar Plots: These bar charts display the distribution of revenue across different ads, indicating how each algorithm allocates resources across the sites.
In [3]:
def thompson_sampling(K, L, c, s, horizon, a_prior=1, b_prior=1):
    visits = np.zeros((K, L), dtype=int)
    clicks = np.zeros((K, L), dtype=int)
    estimated_prob = np.zeros((K, L))
    
    a = a_prior * np.ones((K, L))
    b = b_prior * np.ones((K, L))
    
    for t in range(1, horizon + 1):
        sampled_theta = np.random.beta(a, b)
        i, j = np.unravel_index(np.argmax(sampled_theta), sampled_theta.shape)
        
        click_prob = click_probability(c, s)[i, j]
        click = np.random.rand() < click_prob
        
        visits[i, j] += 1
        clicks[i, j] += click
        estimated_prob[i, j] = clicks[i, j] / visits[i, j]
        
        # Update beta distribution parameters
        a[i, j] += click
        b[i, j] += 1 - click
    
    return estimated_prob, visits, clicks

def ucb_selection(K, L, c, s, horizon, alpha=1.0):
    visits = np.zeros((K, L), dtype=int)
    clicks = np.zeros((K, L), dtype=int)
    estimated_prob = np.zeros((K, L))
    
    for t in range(1, horizon + 1):
        ucb_values = estimated_prob + alpha * np.sqrt(np.log(t) / (visits + 1))
        i, j = np.unravel_index(np.argmax(ucb_values), ucb_values.shape)
        
        click_prob = click_probability(c, s)[i, j]
        click = np.random.rand() < click_prob
        
        visits[i, j] += 1
        clicks[i, j] += click
        estimated_prob[i, j] = clicks[i, j] / visits[i, j]
    
    return estimated_prob, visits, clicks

def run_simulation(algorithm, d, L, horizon, K=100):
    c, s = initialize_vectors(K, L)
    if algorithm == 'Thompson':
        estimated_prob, visits, clicks = thompson_sampling(K, L, c, s, horizon)
    elif algorithm == 'UCB':
        estimated_prob, visits, clicks = ucb_selection(K, L, c, s, horizon)
    
    return estimated_prob, visits, clicks, c, s

# Parameters
d = 1
L_list = [1, 2, 10]
horizon = 10000
K = 100
algorithms = ['Thompson', 'UCB']

results = {}
for algorithm in algorithms:
    for L in L_list:
        estimated_prob, visits, clicks, c, s = run_simulation(algorithm, d, L, horizon, K)
        true_prob = click_probability(c, s)
        results[(algorithm, L)] = {
            'estimated_prob': estimated_prob,
            'visits': visits,
            'clicks': clicks,
            'true_prob': true_prob,
            'cumulative_revenue': np.cumsum(clicks * np.random.rand(K, L))
        }

# Visualization of Results
fig_scatter = make_subplots(rows=2, cols=3, subplot_titles=[f'{algo}, d={d}, L={L}' for algo in algorithms for L in L_list])
fig_revenue = make_subplots(rows=2, cols=3, subplot_titles=[f'{algo}, d={d}, L={L}' for algo in algorithms for L in L_list])
fig_bar = make_subplots(rows=2, cols=3, subplot_titles=[f'{algo}, d={d}, L={L}' for algo in algorithms for L in L_list])

for i, ((algorithm, L), result) in enumerate(results.items()):
    row = i // 3 + 1
    col = i % 3 + 1
    
    # Scatter plot for estimated vs. true click probabilities
    scatter_data = pd.DataFrame({
        'True Probability': result['true_prob'].flatten(),
        'Estimated Probability': result['estimated_prob'].flatten()
    })
    fig_scatter.add_trace(go.Scatter(x=scatter_data['True Probability'], y=scatter_data['Estimated Probability'], mode='markers', marker=dict(color='blue')), row=row, col=col)
    fig_scatter.add_shape(type="line", line=dict(dash='dash', color='red'), x0=0, y0=0, x1=1, y1=1, row=row, col=col)

    # Line plot for cumulative revenue over time
    fig_revenue.add_trace(go.Scatter(y=result['cumulative_revenue'].flatten(), mode='lines', line=dict(color='green')), row=row, col=col)

    # Bar plot for revenue per ad
    ad_revenue = result['clicks'] * np.random.rand(K, L)
    bar_data = pd.DataFrame({
        'Ad': range(K * L),
        'Revenue': ad_revenue.flatten()
    })
    fig_bar.add_trace(go.Bar(x=bar_data['Ad'], y=bar_data['Revenue'], marker=dict(color='purple')), row=row, col=col)

fig_scatter.update_layout(height=800, width=1200, title_text="Estimated vs True Click Probabilities (Thompson vs UCB)")
fig_revenue.update_layout(height=800, width=1200, title_text="Cumulative Revenue over Time (Thompson vs UCB)")
fig_bar.update_layout(height=800, width=1200, title_text="Revenue per Ad (Thompson vs UCB)")

fig_scatter.show()
fig_revenue.show()
fig_bar.show()

Implementation Details¶

This code implements and simulates a novel Adaptive Exploration-Exploitation (AEE) strategy, combining elements from both the Upper Confidence Bound (UCB) and Thompson Sampling algorithms. The goal is to dynamically balance exploration and exploitation more effectively, optimizing ad allocation across multiple platforms (sites) over time.

  1. Adaptive Exploration-Exploitation (AEE) Strategy:

    • The AEE strategy introduces a hybrid approach, where Thompson Sampling is used for underexplored sites and UCB is applied to more frequently visited sites. This combination aims to leverage the strengths of both algorithms: Thompson Sampling’s effective exploration via Bayesian inference and UCB’s focused exploitation based on confidence bounds.
    • The algorithm calculates UCB values for each firm-site pair, reflecting the potential benefit of choosing that site. Simultaneously, it samples from a Beta distribution, as done in Thompson Sampling, to gauge the uncertainty in the estimated probabilities.
    • A combined score, incorporating both the UCB values and Thompson Sampling estimates, is calculated. This score is then used to select the site, dynamically adjusting the exploration-exploitation trade-off based on the current state of the system.
  2. Running Simulations:

    • The run_simulation_adaptive function is used to execute the AEE strategy across various configurations defined by different numbers of sites (L). The function initializes the click probabilities and site traffic, and then applies the AEE strategy over a specified time horizon.
    • For each configuration, the algorithm tracks estimated probabilities, the number of visits and clicks for each firm-site pair, and cumulative revenue over time. These outputs are used to assess the effectiveness of the AEE strategy in various scenarios.
  3. Result Visualization:

    • The results of the AEE strategy are visualized using three types of plots:
      • Scatter Plots: These plots compare the estimated click probabilities against the true probabilities, providing insight into the accuracy of the AEE strategy’s estimates.
      • Cumulative Revenue Plots: These line plots show how revenue accumulates over time, indicating the AEE strategy’s ability to optimize returns through a dynamic exploration-exploitation balance.
      • Revenue per Ad Bar Plots: These bar charts display the distribution of revenue across different ads, helping to assess how the AEE strategy allocates resources across the sites.
In [4]:
def adaptive_exploration_exploitation(K, L, c, s, horizon, alpha=1.0, beta=0.5):
    visits = np.zeros((K, L), dtype=int)
    clicks = np.zeros((K, L), dtype=int)
    estimated_prob = np.zeros((K, L))
    
    a = np.ones((K, L))
    b = np.ones((K, L))
    
    for t in range(1, horizon + 1):
        ucb_values = estimated_prob + alpha * np.sqrt(np.log(t) / (visits + 1))
        sampled_theta = np.random.beta(a, b)
        
        # Adaptive selection: Use Thompson Sampling for underexplored arms, UCB for others
        explore_exploit_threshold = np.sqrt(np.log(t) / (visits + 1))
        combined_scores = beta * sampled_theta + (1 - beta) * ucb_values
        i, j = np.unravel_index(np.argmax(combined_scores), combined_scores.shape)
        
        click_prob = click_probability(c, s)[i, j]
        click = np.random.rand() < click_prob
        
        visits[i, j] += 1
        clicks[i, j] += click
        estimated_prob[i, j] = clicks[i, j] / visits[i, j]
        
        # Update the beta distribution parameters for Thompson Sampling
        a[i, j] += click
        b[i, j] += 1 - click
    
    return estimated_prob, visits, clicks

def run_simulation_adaptive(d, L, horizon, K=100):
    c, s = initialize_vectors(K, L)
    estimated_prob, visits, clicks = adaptive_exploration_exploitation(K, L, c, s, horizon)
    
    return estimated_prob, visits, clicks, c, s

# Parameters
d = 1
L_list = [1, 2, 10]
horizon = 10000
K = 100

results_adaptive = {}
for L in L_list:
    estimated_prob, visits, clicks, c, s = run_simulation_adaptive(d, L, horizon, K)
    true_prob = click_probability(c, s)
    results_adaptive[(d, L)] = {
        'estimated_prob': estimated_prob,
        'visits': visits,
        'clicks': clicks,
        'true_prob': true_prob,
        'cumulative_revenue': np.cumsum(clicks * np.random.rand(K, L))
    }

# Visualization of Results
fig_scatter = make_subplots(rows=1, cols=3, subplot_titles=[f'd={d}, L={L}' for L in L_list])
fig_revenue = make_subplots(rows=1, cols=3, subplot_titles=[f'd={d}, L={L}' for L in L_list])
fig_bar = make_subplots(rows=1, cols=3, subplot_titles=[f'd={d}, L={L}' for L in L_list])

for i, (key, result) in enumerate(results_adaptive.items()):
    L = key[1]  # Extract the L value correctly
    row = 1
    col = i + 1
    
    # Scatter plot for estimated vs. true click probabilities
    scatter_data = pd.DataFrame({
        'True Probability': result['true_prob'].flatten(),
        'Estimated Probability': result['estimated_prob'].flatten()
    })
    fig_scatter.add_trace(go.Scatter(x=scatter_data['True Probability'], y=scatter_data['Estimated Probability'], mode='markers', marker=dict(color='blue')), row=row, col=col)
    fig_scatter.add_shape(type="line", line=dict(dash='dash', color='red'), x0=0, y0=0, x1=1, y1=1, row=row, col=col)

    # Line plot for cumulative revenue over time
    fig_revenue.add_trace(go.Scatter(y=result['cumulative_revenue'].flatten(), mode='lines', line=dict(color='green')), row=row, col=col)

    # Bar plot for revenue per ad
    ad_revenue = result['clicks'] * np.random.rand(K, L)  # Make sure L is an integer
    bar_data = pd.DataFrame({
        'Ad': range(K * L),
        'Revenue': ad_revenue.flatten()
    })
    fig_bar.add_trace(go.Bar(x=bar_data['Ad'], y=bar_data['Revenue'], marker=dict(color='purple')), row=row, col=col)

fig_scatter.update_layout(height=400, width=1200, title_text="Estimated vs True Click Probabilities (Adaptive AEE)")
fig_revenue.update_layout(height=400, width=1200, title_text="Cumulative Revenue over Time (Adaptive AEE)")
fig_bar.update_layout(height=400, width=1200, title_text="Revenue per Ad (Adaptive AEE)")

fig_scatter.show()
fig_revenue.show()
fig_bar.show()
In [ ]: